## TITULO: The best of two worlds: selection strategies for vice-presidential candidates 

library(tidyverse)
library(readxl)
library(ggplot2)
library(RColorBrewer)
library(extrafont)
library(rstatix)

## Obs.: Para mais informacoes sobre o banco de dados, verificar o livro de codigos

### Importando o dataset

BRVP_dataset <- read_excel("Brazilian Vice-Presidential Candidates dataset.xlsx", sheet = 2)

## Estrategia 1: EQUILIBRIO DE FORMULA ----

## Graph 1 - Ticket balance in the selection of vice-presidential candidates ----

## 1. Verificando a contagem da presenca (1) e da ausencia (0) das estrategias de equilibrio de formula

## Equilibrio regional
BRVP_dataset %>%
  group_by(regional_balance) %>%
  summarise(n = n())

## Equilibrio etario  
BRVP_dataset %>%
  group_by(age_balance) %>%
  summarise(n = n())

## Equilibrio de genero
BRVP_dataset %>%
  group_by(gender_balance) %>%
  summarise(n = n())

## Equilibrio ideologico
BRVP_dataset %>%
  group_by(ideological_balance) %>%
  summarise(n = n())

# 2. Preparando o data.frame para plotagem do grafico
  
  ## Foi criado um data.frame para cada valor

name = c("Regional", "Age", "Gender", "Ideological")
value_presente = c (38, 32, 20, 9)
value_ausente = c(25, 31, 43, 54)

presente = data.frame("Balance" = name, "Present" = value_presente)
ausente = data.frame("Balance" = name, "Absent" = value_ausente)

  ## Unindo os data.frames

grafico1 <- presente %>%
  left_join(ausente, by = "Balance")

  ## Colocando o objeto "grafico1" em formato long

grafico1 <- grafico1 %>%
  pivot_longer(!Balance,
               names_to = "Verificacao",
               values_to = "count")
  
  ## Adicionando a frequencia

grafico1 <- grafico1 %>%
  group_by(Balance) %>%
  mutate(freq = count/sum(count))

# 3. Plotagem do grafico 1

grafico1 %>%
  ggplot(aes(x = Balance, 
             y = freq, 
             fill = Verificacao,
             label = scales::percent(freq),
             width = 0.8)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_classic() +
  geom_text(position = position_dodge(width = 0.9),
            vjust = -0.5,
            size = 4,
            family = "Cambria") +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_grey(start = 0.35, end = 0.75) +
  labs(x = "Ticket balance",
       y = "Vice-presidential candidates",
       fill = "Strategy") +
  theme(axis.title.x = element_text(size = 14, margin = margin(t = 15, r = 0, b = 10, l = 0)),
        axis.text.x = element_text(size = 13), 
        axis.title.y = element_text(size = 14, margin = margin(t = 0, r = 10, b = 0, l = 5)),
        axis.text.y = element_text(size = 13),
        legend.title = element_text(size = 12, margin = margin(t = 0, r = 2,  b = 0, l = 2)),
        legend.text = element_text(size = 11),
        text = element_text(family = "Cambria"))

## Verificando informacoes adicionais

## Equilibrio regional ----

  ## O objetivo e verificar qual a composicao regional mais frequente

BRVP_dataset %>% select (election_year, vice_region, president_region, regional_balance) %>%
  filter(regional_balance == 1) %>%
  unite(col = "regional_composition", vice_region, president_region, sep = "/", remove = FALSE) %>%
  count(regional_composition) %>% arrange(n)
  
    ## Somando as composicoes "Sudeste/Nordeste" e "Nordeste/Sudeste", se verifica a combinacao mais frequente.

## Equilibrio ideologico ----

  ## O objetivo e verificar como se comportam as formulas ideologicamente equilibradas

BRVP_dataset %>% 
  select(vp_party, vp_party_orientation,
                president_party, president_party_orientation, 
                ideological_balance, 
                election_year) %>%
  filter(ideological_balance == 1) %>% arrange(desc(election_year))

  ## Ha dois partidos mais frequentes: PSDB e PT. No objeto e possivel verificar a forma com que esses partidos equilibraram

## Graph 2 - Political trajectory of vice-presidential candidates (n=63) ----

## 1. Verificando a contagem da presenca (1) e da ausencia (0) dos criterios de experiencia politica

  ## Experiencia no poder Executivo

BRVP_dataset %>%
  group_by(exp_executive) %>%
  summarise(n = n()) 

  ## Experiencia no poder Legislativo  

BRVP_dataset %>%
  group_by(exp_legislative) %>%
  summarise(n = n()) 

  ## Lideranca legislativa federal

BRVP_dataset %>%
  group_by(legislative_fed_leader) %>%
  summarise(n = n())

  ## Administracao partidaria

BRVP_dataset %>%
  group_by(party_administration) %>%
  summarise(n = n()) 

## 2. Preparando o data.frame para plotagem do grafico

  ## Foi criado um data.frame para cada valor

name = c("Executive", "Legislative", "Leg. Leader", "Party Administration")
value_presente = c (21, 27, 22, 20)
value_ausente = c(42, 36, 41, 43)

nao_possui = data.frame("Experience" = name, "Absent" = value_ausente)
possui = data.frame("Experience" = name, "Present" = value_presente)

  ## Unindo os data.frames

grafico2 <- nao_possui %>%
  left_join(possui, by = "Experience")

  ## Colocando o objeto "grafico2" em formato long

grafico2 <- grafico2 %>%
  pivot_longer(!Experience,
               names_to = "Verificacao",
               values_to = "count")

  ## Adicionando a frequencia

grafico2 <- grafico2 %>%
  group_by(Experience) %>%
  mutate(freq = count/sum(count))

# 3. Plotagem do grafico 2

grafico2 %>%
  ggplot(aes(x = Experience, 
             y = freq, 
             fill = reorder(Verificacao, -freq),
             label = scales::percent(freq),
             width = 0.8)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_classic() +
  geom_text(position = position_dodge(width = 0.9),
            vjust = -0.5,
            size = 4,
            family = "Cambria") +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_grey(start = 0.35, end = 0.75,
                  limits = c("Present", "Absent")) +
  labs(x = "Political trajectory",
       y = "Vice-presidential candidates",
       fill = "Experience") +
  theme(axis.title.x = element_text(size = 14, margin = margin(t = 15, r = 0, b = 10, l = 0)),
        axis.text.x = element_text(size = 12),
        axis.title.y = element_text(size = 12,
                                    margin = margin(t = 0, r = 10,  b = 0, l = 5)),
        axis.text.y = element_text(size = 14),
        legend.title = element_text(size = 12,
                                    margin = margin(t = 0, r = 2, b = 0, l = 2)),
        legend.text = element_text(size = 11),
        text = element_text(family = "Cambria"))

## Verificando informacoes adicionais

## Experiencia no poder Legislativo ----

BRVP_dataset %>% select (election_year, exp_legislative, legislative_fed_leader) %>%
  filter(exp_legislative == -1) %>%
  count(legislative_fed_leader)

  ## Todos os candidatos com experiencia no poder Legislativo passaram pelo Congresso Nacional

## Candidatos Senadores ----

BRVP_dataset %>% select(election_year, exp_legislative, senator, midterm_senator_mandate) %>%
  filter(exp_legislative == -1, senator != "0") %>%
  count(midterm_senator_mandate)

  ## O resultado mostra que um terco dos candidatos que passaram pelo Senado, 
  ## estavam no curso do mandato quando foram selecionados para serem candidatos a vice-presidencia

## Experiencia no poder Executivo ----

BRVP_dataset %>% select(election_year, exp_executive, national_exec_post, subnational_exec_post) %>%
  filter(exp_executive == -1) %>%
  count(subnational_exec_post) ## Verificando a experiencia no executivo subnacional

BRVP_dataset %>% select(election_year, exp_executive, national_exec_post, subnational_exec_post) %>%
  filter(exp_executive == -1) %>%
  count(subnational_exec_post) ## Verificando a experiencia no executivo nacional

## Graph 3 - Profile of vice-presidential candidates by ticket competitiveness ----

## Preparando o objeto "grafico 3" para a plotagem

grafico3 <- BRVP_dataset %>% select(election_year, president_party, vpcpi_profile, 
                            presidential_ticket, leading_candidates, vpcpi_indexvalue) %>%
  mutate(profile = case_when
         (vpcpi_profile == "POLITICAL EXPERT" ~ "Political Expert",
           vpcpi_profile == "VOTE PULLER" ~ "Vote Puller",
           vpcpi_profile == "NO SCORE" ~ "No score",
           vpcpi_profile == "HYBRID" ~ "Hybrid"),
         leading = case_when
         (leading_candidates == "1" ~ "Leading tickets",
           leading_candidates == "0" ~ "Non-leading tickets"),
         ticket = case_when(
           presidential_ticket == "MIXED" ~ "Mixed ticket",
           presidential_ticket == "PURE" ~ "Pure ticket"))

grafico3 %>%
  count(profile = factor(profile), leading) %>% 
  mutate(freq = prop.table(n)) %>%
  ggplot(aes(x = reorder(profile, -freq),
             y = freq, label = scales:: percent(freq), 
             fill = leading)) +
  geom_col(position = "stack") +
  theme_classic() +
  geom_text(size = 4.5, fontface = "bold", family = "Cambria", position = position_stack(vjust = 0.6)) +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_grey(start = 0.35, end = 0.75) +
  labs(x = "Profile",
       y = "Vice-presidential candidates",
       fill = "Leading tickets") +
  theme(axis.title.x = element_text(size = 14, margin = margin(t = 15, r = 0, b = 10, l = 0)),
        axis.text.x = element_text(size = 11),
        axis.title.y = element_text(size = 14, margin = margin(t = 0, r = 10, b = 0, l = 5)),
        axis.text.y = element_text(size = 12),
        text = element_text(family = "Cambria"),
        legend.title = element_text (size=14),
        legend.text = element_text (size = 12))

## Teste-t de valores independentes ----

## Criando os vetores: populacao da amostra X candidatos a vice-presidencia no 1? e 2? lugar

leading <- BRVP_dataset %>% filter(leading_candidates == 1) %>% select(vpcpi_indexvalue)
leading %>% get_summary_stats(vpcpi_indexvalue)
leading.test <- leading$vpcpi_indexvalue

pop_total <- BRVP_dataset %>% select(vpcpi_indexvalue)
pop_total %>% get_summary_stats(vpcpi_indexvalue)
pop_total_test <- pop_total$vpcpi_indexvalue

## Rodando o teste-t

t.test(leading.test, pop_total_test)
t.test(leading.test, pop_total_test, var.equal = 1)

